home *** CD-ROM | disk | FTP | other *** search
Wrap
# Source Generated with Decompyle++ # File: in.pyc (Python 2.4) import unittest import StringIO import robotparser from test import test_support class RobotTestCase(unittest.TestCase): def __init__(self, index, parser, url, good, agent): unittest.TestCase.__init__(self) if good: self.str = 'RobotTest(%d, good, %s)' % (index, url) else: self.str = 'RobotTest(%d, bad, %s)' % (index, url) self.parser = parser self.url = url self.good = good self.agent = agent def runTest(self): if isinstance(self.url, tuple): (agent, url) = self.url else: url = self.url agent = self.agent if self.good: self.failUnless(self.parser.can_fetch(agent, url)) else: self.failIf(self.parser.can_fetch(agent, url)) def __str__(self): return self.str tests = unittest.TestSuite() def RobotTest(index, robots_txt, good_urls, bad_urls, agent = 'test_robotparser'): lines = StringIO.StringIO(robots_txt).readlines() parser = robotparser.RobotFileParser() parser.parse(lines) for url in good_urls: tests.addTest(RobotTestCase(index, parser, url, 1, agent)) for url in bad_urls: tests.addTest(RobotTestCase(index, parser, url, 0, agent)) doc = '\nUser-agent: *\nDisallow: /cyberworld/map/ # This is an infinite virtual URL space\nDisallow: /tmp/ # these will soon disappear\nDisallow: /foo.html\n' good = [ '/', '/test.html'] bad = [ '/cyberworld/map/index.html', '/tmp/xxx', '/foo.html'] RobotTest(1, doc, good, bad) doc = '\n# robots.txt for http://www.example.com/\n\nUser-agent: *\nDisallow: /cyberworld/map/ # This is an infinite virtual URL space\n\n# Cybermapper knows where to go.\nUser-agent: cybermapper\nDisallow:\n\n' good = [ '/', '/test.html', ('cybermapper', '/cyberworld/map/index.html')] bad = [ '/cyberworld/map/index.html'] RobotTest(2, doc, good, bad) doc = '\n# go away\nUser-agent: *\nDisallow: /\n' good = [] bad = [ '/cyberworld/map/index.html', '/', '/tmp/'] RobotTest(3, doc, good, bad) doc = '\nUser-agent: figtree\nDisallow: /tmp\nDisallow: /a%3cd.html\nDisallow: /a%2fb.html\nDisallow: /%7ejoe/index.html\n' good = [] bad = [ '/tmp', '/tmp.html', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html', '/a%2fb.html', '/~joe/index.html'] RobotTest(4, doc, good, bad, 'figtree') RobotTest(5, doc, good, bad, 'FigTree Robot libwww-perl/5.04') doc = '\nUser-agent: *\nDisallow: /tmp/\nDisallow: /a%3Cd.html\nDisallow: /a/b.html\nDisallow: /%7ejoe/index.html\n' good = [ '/tmp'] bad = [ '/tmp/', '/tmp/a.html', '/a%3cd.html', '/a%3Cd.html', '/a/b.html', '/%7Ejoe/index.html'] RobotTest(6, doc, good, bad) doc = '\nUser-Agent: *\nDisallow: /.\n' good = [ '/foo.html'] bad = [] RobotTest(7, doc, good, bad) def test_main(): test_support.run_suite(tests) if __name__ == '__main__': test_support.Verbose = 1 test_support.run_suite(tests)